*************************************************************************
10/19/17:  ADDED SY 2018

11/24/17:  ADDED SY 2017

10/19/15:  ADDED SY 2015

11/3/14:  ADDED SY 2014

12/9/13:  ADDED SY 2013

12/22/12:  ADDED SY 2012

11/4/11:  ADDED SY 2011

11/4/10:  ADDED 2010 SY DATA, BUT THESE DATA DO NOT INCLUDE NON-CASH BENEFITS OR 
		  STATE TAXES, ONLY RAN RESULTS THROUGH CREATING MELLY DATA.  NOTE, ONLY GO
		  THROUGH 2008 FOR MELLY DATA DUE TO MISSING STATE TAXES

2/4/10:  RE-RAN AFTER UPDATED 2009 SY DATA SO NOW INCLUDES NON-CASH BENEFITS
		 (ONLY RE-RAN, UP TO OUTFILE4, NOT THE DECOMPS)

6/2/10:  RE-RAN 60S AND 70S ONLY, AFTER UPDATED 64-71 SY DATA SO NOW HAVE ALL OF 63-08

*************************************************************************;

*************************************************************************
CALCUALTE THE IMPORTANCE OF EQIV SCALE ON INEQUALITY FOR 2016
*************************************************************************;
DATA temp;
SET cpsdata.poverty2018;

hscale=(hpersons-hrelu18+.7*hrelu18)**.7;
hinc1=htotval/hscale;

RUN;

proc univariate data=temp; 
   var htotval hinc1;
   weight msupwgt; 
   output out=out_temp PCTLPTS=5 10 20 25 40 50 60 75 80 90 95
                     pctlpre=unsc_ sc_ 
                     pctlname=p5 p10 p20 p25 p40 p50 p60 p75 p80 p90 p95;
RUN;

PROC PRINT DATA=out_temp;
VAR unsc_p10 unsc_p90 sc_p10 sc_p90;
RUN;


*************************************************************************
GENERAGE RESULTS FOR 1960S AND 1970S
*************************************************************************;

PROC SORT DATA=cpsdata.inequality_6479; 
   BY ref_year; 
RUN; 
        
proc univariate data=cpsdata.inequality_6479; 
   var inc1-inc4;
   BY ref_year;
   weight msupwgt; 
   output out=out_dat1 PCTLPTS=5 10 20 25 40 50 60 75 80 90 95
                     pctlpre=inc1_ inc2_ inc3_ inc4_ 
                     pctlname=p5 p10 p20 p25 p40 p50 p60 p75 p80 p90 p95;
RUN;
 
FILENAME outfile1 'C:\Users\worri\Dropbox\Poverty\Inequality\ineq_6479.dat';
DATA _null_;
   SET out_dat1;
   FILE outfile1 lrecl=1500;
   PUT ref_year
       inc1_p5 inc1_p10 inc1_p20 inc1_p25 inc1_p40 inc1_p50 inc1_p60 inc1_p75 inc1_p80 inc1_p90 inc1_p95
       inc2_p5 inc2_p10 inc2_p20 inc2_p25 inc2_p40 inc2_p50 inc2_p60 inc2_p75 inc2_p80 inc2_p90 inc2_p95
	   inc3_p5 inc3_p10 inc3_p20 inc3_p25 inc3_p40 inc3_p50 inc3_p60 inc3_p75 inc3_p80 inc3_p90 inc3_p95
	   inc4_p5 inc4_p10 inc4_p20 inc4_p25 inc4_p40 inc4_p50 inc4_p60 inc4_p75 inc4_p80 inc4_p90 inc4_p95
      ;
RUN; 


*************************************************************************
GENERAGE RESULTS FOR 1980S 
*************************************************************************;
PROC SORT DATA=cpsdata.inequality_8089; 
   BY ref_year; 
RUN; 
        
proc univariate data=cpsdata.inequality_8089; 
   var inc1-inc10;
   BY ref_year;
   weight msupwgt; 
   output out=out_dat2 PCTLPTS=5 10 20 25 40 50 60 75 80 90 95
                     pctlpre=inc1_ inc2_ inc3_ inc4_ inc5_ inc6_ inc7_ inc8_ inc9_ inc10_
                     pctlname=p5 p10 p20 p25 p40 p50 p60 p75 p80 p90 p95;
RUN;
 
FILENAME outfile2 'C:\Users\worri\Dropbox\Poverty\Inequality\ineq_8089.dat';
DATA _null_;
   SET out_dat2;
   FILE outfile2 lrecl=1500;
   PUT ref_year
       inc1_p5 inc1_p10 inc1_p20 inc1_p25 inc1_p40 inc1_p50 inc1_p60 inc1_p75 inc1_p80 inc1_p90 inc1_p95
       inc2_p5 inc2_p10 inc2_p20 inc2_p25 inc2_p40 inc2_p50 inc2_p60 inc2_p75 inc2_p80 inc2_p90 inc2_p95
       inc3_p5 inc3_p10 inc3_p20 inc3_p25 inc3_p40 inc3_p50 inc3_p60 inc3_p75 inc3_p80 inc3_p90 inc3_p95
       inc4_p5 inc4_p10 inc4_p20 inc4_p25 inc4_p40 inc4_p50 inc4_p60 inc4_p75 inc4_p80 inc4_p90 inc4_p95
       inc5_p5 inc5_p10 inc5_p20 inc5_p25 inc5_p40 inc5_p50 inc5_p60 inc5_p75 inc5_p80 inc5_p90 inc5_p95
       inc6_p5 inc6_p10 inc6_p20 inc6_p25 inc6_p40 inc6_p50 inc6_p60 inc6_p75 inc6_p80 inc6_p90 inc6_p95
       inc7_p5 inc7_p10 inc7_p20 inc7_p25 inc7_p40 inc7_p50 inc7_p60 inc7_p75 inc7_p80 inc7_p90 inc7_p95
       inc8_p5 inc8_p10 inc8_p20 inc8_p25 inc8_p40 inc8_p50 inc8_p60 inc8_p75 inc8_p80 inc8_p90 inc8_p95
	   inc9_p5 inc9_p10 inc9_p20 inc9_p25 inc9_p40 inc9_p50 inc9_p60 inc9_p75 inc9_p80 inc9_p90 inc9_p95
	   inc10_p5 inc10_p10 inc10_p20 inc10_p25 inc10_p40 inc10_p50 inc10_p60 inc10_p75 inc10_p80 inc10_p90 inc10_p95
;
RUN; 
*************************************************************************
GENERAGE RESULTS FOR 1990S 
*************************************************************************;
PROC SORT DATA=cpsdata.inequality_9099; 
   BY ref_year; 
RUN; 
        
proc univariate data=cpsdata.inequality_9099; 
   var inc1-inc10;
   BY ref_year;
   weight msupwgt; 
   output out=out_dat3 PCTLPTS=5 10 20 25 40 50 60 75 80 90 95
                     pctlpre=inc1_ inc2_ inc3_ inc4_ inc5_ inc6_ inc7_ inc8_ inc9_ inc10_
                     pctlname=p5 p10 p20 p25 p40 p50 p60 p75 p80 p90 p95;
RUN;
 
FILENAME outfile3 'C:\Users\worri\Dropbox\Poverty\Inequality\ineq_9099.dat';
DATA _null_;
   SET out_dat3;
   FILE outfile3 lrecl=1500;
   PUT ref_year
       inc1_p5 inc1_p10 inc1_p20 inc1_p25 inc1_p40 inc1_p50 inc1_p60 inc1_p75 inc1_p80 inc1_p90 inc1_p95
       inc2_p5 inc2_p10 inc2_p20 inc2_p25 inc2_p40 inc2_p50 inc2_p60 inc2_p75 inc2_p80 inc2_p90 inc2_p95
       inc3_p5 inc3_p10 inc3_p20 inc3_p25 inc3_p40 inc3_p50 inc3_p60 inc3_p75 inc3_p80 inc3_p90 inc3_p95
       inc4_p5 inc4_p10 inc4_p20 inc4_p25 inc4_p40 inc4_p50 inc4_p60 inc4_p75 inc4_p80 inc4_p90 inc4_p95
       inc5_p5 inc5_p10 inc5_p20 inc5_p25 inc5_p40 inc5_p50 inc5_p60 inc5_p75 inc5_p80 inc5_p90 inc5_p95
       inc6_p5 inc6_p10 inc6_p20 inc6_p25 inc6_p40 inc6_p50 inc6_p60 inc6_p75 inc6_p80 inc6_p90 inc6_p95
       inc7_p5 inc7_p10 inc7_p20 inc7_p25 inc7_p40 inc7_p50 inc7_p60 inc7_p75 inc7_p80 inc7_p90 inc7_p95
       inc8_p5 inc8_p10 inc8_p20 inc8_p25 inc8_p40 inc8_p50 inc8_p60 inc8_p75 inc8_p80 inc8_p90 inc8_p95
	   inc9_p5 inc9_p10 inc9_p20 inc9_p25 inc9_p40 inc9_p50 inc9_p60 inc9_p75 inc9_p80 inc9_p90 inc9_p95
	   inc10_p5 inc10_p10 inc10_p20 inc10_p25 inc10_p40 inc10_p50 inc10_p60 inc10_p75 inc10_p80 inc10_p90 inc10_p95
;
RUN; 
*************************************************************************
GENERAGE RESULTS FOR 2000S 
*************************************************************************;
PROC SORT DATA=cpsdata.inequality_0018; 
   BY ref_year; 
RUN; 
        
proc univariate data=cpsdata.inequality_0018; 
   var inc1-inc10;
   BY ref_year;
   weight msupwgt; 
   output out=out_dat4 PCTLPTS=5 10 20 25 40 50 60 75 80 90 95
                     pctlpre=inc1_ inc2_ inc3_ inc4_ inc5_ inc6_ inc7_ inc8_ inc9_ inc10_
                     pctlname=p5 p10 p20 p25 p40 p50 p60 p75 p80 p90 p95;
RUN;
 
FILENAME outfile4 'C:\Users\worri\Dropbox\Poverty\Inequality\ineq_0018.dat';
DATA _null_;
   SET out_dat4;
   FILE outfile4 lrecl=1500;
   PUT ref_year
       inc1_p5 inc1_p10 inc1_p20 inc1_p25 inc1_p40 inc1_p50 inc1_p60 inc1_p75 inc1_p80 inc1_p90 inc1_p95
       inc2_p5 inc2_p10 inc2_p20 inc2_p25 inc2_p40 inc2_p50 inc2_p60 inc2_p75 inc2_p80 inc2_p90 inc2_p95
       inc3_p5 inc3_p10 inc3_p20 inc3_p25 inc3_p40 inc3_p50 inc3_p60 inc3_p75 inc3_p80 inc3_p90 inc3_p95
       inc4_p5 inc4_p10 inc4_p20 inc4_p25 inc4_p40 inc4_p50 inc4_p60 inc4_p75 inc4_p80 inc4_p90 inc4_p95
       inc5_p5 inc5_p10 inc5_p20 inc5_p25 inc5_p40 inc5_p50 inc5_p60 inc5_p75 inc5_p80 inc5_p90 inc5_p95
       inc6_p5 inc6_p10 inc6_p20 inc6_p25 inc6_p40 inc6_p50 inc6_p60 inc6_p75 inc6_p80 inc6_p90 inc6_p95
       inc7_p5 inc7_p10 inc7_p20 inc7_p25 inc7_p40 inc7_p50 inc7_p60 inc7_p75 inc7_p80 inc7_p90 inc7_p95
       inc8_p5 inc8_p10 inc8_p20 inc8_p25 inc8_p40 inc8_p50 inc8_p60 inc8_p75 inc8_p80 inc8_p90 inc8_p95
	   inc9_p5 inc9_p10 inc9_p20 inc9_p25 inc9_p40 inc9_p50 inc9_p60 inc9_p75 inc9_p80 inc9_p90 inc9_p95
	   inc10_p5 inc10_p10 inc10_p20 inc10_p25 inc10_p40 inc10_p50 inc10_p60 inc10_p75 inc10_p80 inc10_p90 inc10_p95
;
RUN; 

******************************************************************************
GENERATE RESULTS FOR INEQUALITY BY DEMOGRAPHIC GROUP
******************************************************************************;	

DATA temp_grp;
   SET cpsdata.inequality_6479(KEEP=ref_year inc2 dem_grp msupwgt) 
	   cpsdata.inequality_8089(KEEP=ref_year inc2 dem_grp msupwgt)
	   cpsdata.inequality_9099(KEEP=ref_year inc2 dem_grp msupwgt)
	   cpsdata.inequality_0018(KEEP=ref_year inc2 dem_grp msupwgt); 
   BY ref_year; 
RUN; 
        
PROC SORT data=temp_grp; 
   BY dem_grp ref_year;
RUN;

proc univariate data=temp_grp; 
   var inc2;
   BY dem_grp ref_year;
   weight msupwgt; 
   output out=out_dat_by_grp PCTLPTS=5 10 20 25 40 50 60 75 80 90 95
                     pctlpre=inc2_ 
                     pctlname=p5 p10 p20 p25 p40 p50 p60 p75 p80 p90 p95;
RUN;
 
FILENAME outfile8 'C:\Users\worri\Dropbox\Poverty\Inequality\ineq_6417_by_grp.dat';
DATA _null_;
   SET out_dat_by_grp;
   FILE outfile8 lrecl=1500;
   PUT dem_grp ref_year inc2_p5 inc2_p10 inc2_p20 inc2_p25 inc2_p40 inc2_p50 inc2_p60 inc2_p75 inc2_p80 inc2_p90 inc2_p95;
RUN; 



******************************************************************************
GENERATE DATA FOR MELLY DECOMPOSITION
******************************************************************************;

DATA temp1; 
   SET cpsdata.inequality_6479(KEEP=ref_year msupwgt r_inc1 r_inc2  
				dem_grp emp_head age_head r1 r2 r3 fam1 fam2 fam3 fam4 
				fam5 ed1 ed2 ed3 ed4);
IF ref_year IN(1963,1972);
RUN;
DATA temp2; 
   SET cpsdata.inequality_8089(KEEP=ref_year msupwgt r_inc1 r_inc2 r_inc6 
				dem_grp emp_head age_head r1 r2 r3 fam1 fam2 fam3 fam4 
				fam5 ed1 ed2 ed3 ed4);
IF ref_year=1980;
RUN;
DATA temp3; 
   SET cpsdata.inequality_9099(KEEP=ref_year msupwgt r_inc1 r_inc2 r_inc6 
				dem_grp emp_head age_head r1 r2 r3 fam1 fam2 fam3 fam4 
				fam5 ed1 ed2 ed3 ed4);
IF ref_year=1990;
RUN;
DATA temp4; 
   SET cpsdata.inequality_0018(KEEP=ref_year msupwgt r_inc1 r_inc2 r_inc6 
				dem_grp emp_head age_head r1 r2 r3 fam1 fam2 fam3 fam4 
				fam5 ed1 ed2 ed3 ed4);
IF ref_year IN(2000,2017);
RUN;

FILENAME outfile6 'C:\Users\worri\Dropbox\Poverty\Inequality\melly6317.dat';
DATA _null_; 
   SET temp1 temp2 temp3 temp4;
   FILE outfile6 lrecl=1500; 
   PUT ref_year msupwgt r_inc1 r_inc2 r_inc6 dem_grp emp_head age_head 
	   r1 r2 r3 fam1 fam2 fam3 fam4 fam5 ed1 ed2 ed3 ed4;
RUN; 


******************************************************************************
GENERATE DATA FOR AEI PROJECT MEDIAN AND 10TH PCTILE DECOMPOSITION
******************************************************************************;

DATA temp1; 
   SET cpsdata.inequality_8089(KEEP=ref_year msupwgt r_inc7 emp_head r1 r2 r3 
									       fam1 fam2 fam3 fam4 fam5 ed1 ed2 ed3 ed4);
IF ref_year=1980;
RUN;
DATA temp2; 
   SET cpsdata.inequality_0018(KEEP=ref_year msupwgt r_inc7 emp_head r1 r2 r3 
									       fam1 fam2 fam3 fam4 fam5 ed1 ed2 ed3 ed4);
IF ref_year IN(2000,2017);
RUN;

FILENAME outfile7 'C:\Users\worri\Dropbox\Poverty\Inequality\aei_decomp80_17.dat';
DATA _null_; 
   SET temp1 temp2;
   FILE outfile7 lrecl=1500; 
   PUT ref_year msupwgt r_inc7 emp_head r1 r2 r3 fam1 fam2 fam3 fam4 fam5 ed1 ed2 ed3 ed4;
RUN; 




******************************************************************************
JMP DECOMPOSITION
******************************************************************************;

DATA inequality;
  SET cpsdata.inequality_6479(KEEP=ref_year counter ln_r_inc2 msupwgt)
      cpsdata.inequality_8089(KEEP=ref_year counter ln_r_inc2 msupwgt)
      cpsdata.inequality_9099(KEEP=ref_year counter ln_r_inc2 msupwgt)
      cpsdata.inequality_0018(KEEP=ref_year counter ln_r_inc2 msupwgt);
RUN;

****************************************************
MEANS, ALL YEARS, TO GET BETA BAR
****************************************************;
PROC SORT DATA=inequality;
   BY counter;
RUN;
PROC MEANS DATA=inequality;
  VAR ln_r_inc2;
  BY counter;
  OUTPUT OUT=mean_out1 MEAN=beta_bar;
RUN;
DATA inequality;
   MERGE inequality 
	     mean_out1(KEEP=counter beta_bar);
   BY counter;
RUN;

PROC MEANS;
RUN;

PROC SORT DATA=inequality;
   BY ref_year;
RUN;

DATA inequality;
  SET inequality;
   ARRAY grp_dum(1:40) ;

   DO t=1 TO 40; 
     IF counter=t THEN grp_dum(t)=1;
       ELSE grp_dum(t)=0; 
   END;

   newid=_N_;
   DROP t ;
RUN;

PROC MEANS DATA=inequality;
RUN;
****************************************************
REGRESSIONS, BY YEAR, TO GET THETA AND F INV BAR
****************************************************;
PROC REG DATA=inequality OUTEST=est_3;
  BY ref_year;
  ID newid;
  MODEL ln_r_inc2 = grp_dum2-grp_dum40;
  OUTPUT OUT=reg_out1(DROP=grp_dum1-grp_dum40) P=yhat R=resid;
RUN;

***********************************************
* CALCULATE THETA FOR EACH YEAR IN MASTER DATA
***********************************************;

PROC UNIVARIATE DATA=reg_out1;
   BY ref_year;
   VAR resid;
   WEIGHT msupwgt;
   OUTPUT OUT=pctile_dat1 PCTLPTS=1 TO 100 BY 1 
		     pctlpre=resid_
		     pctlname=p1-p100;
RUN;

DATA temp1;
   MERGE reg_out1(KEEP=newid resid yhat msupwgt ref_year beta_bar ln_r_inc2)
         pctile_dat1;
   BY ref_year;

ARRAY p1_(1:100) resid_p1-resid_p100; 

DO t=2 TO 99;
  IF resid=. THEN theta=.;
    ELSE IF resid LE resid_p1+((resid_p2-resid_p1)/2) THEN theta=1;
    ELSE IF p1_(t)-((p1_(t)-p1_(t-1))/2) LE resid LT p1_(t)+((p1_(t+1)-p1_(t))/2) THEN theta=t;
    ELSE IF resid GE resid_p99+((resid_p100-resid_p99)/2) THEN theta=100;

END;

KEEP newid theta msupwgt resid yhat beta_bar ln_r_inc2 ref_year;
RUN;

proc freq;
table theta;
weight msupwgt;
run;

**************************************
* POOL ALL YEARS TO GET F INV BAR
**************************************;
PROC UNIVARIATE DATA=reg_out1;
   VAR resid;
   WEIGHT msupwgt;
   OUTPUT OUT=pctile_dat2 PCTLPTS=1 TO 100 BY 1 
		     pctlpre=resid_
		     pctlname=pool1-pool100;
RUN;

DATA temp2;
   IF _N_=1 THEN SET pctile_dat2;
   SET temp1;

   ARRAY p1_(1:100) resid_pool1-resid_pool100; 

  DO t=1 TO 100;
     IF theta=t THEN f_inv_bar=p1_(t);
  END;


  y1=beta_bar+f_inv_bar;
  y2=y1+yhat-beta_bar;
  y2_y1_diff=yhat-beta_bar;

  KEEP msupwgt resid yhat y1 y2 ln_r_inc2 ref_year y2_y1_diff beta_bar ;

RUN;

PROC MEANS;
RUN;

PROC UNIVARIATE DATA=temp2;
   VAR ln_r_inc2 y1 y2 resid yhat y2_y1_diff beta_bar;
   BY ref_year;
   WEIGHT msupwgt;
   OUTPUT OUT=out_jmp1 PCTLPTS=10 20 25 50 75 80 90
		     pctlpre=y3_ y1_ y2_ resid_ yhat_ y2y1_ bbar_
		     pctlname=p10 p20 p25 p50 p75 p80 p90;
RUN;

FILENAME outfile5 'C:\Users\worri\Dropbox\Poverty\Inequality\jmp6409.dat';
DATA _null_; 
   SET out_jmp1;
   FILE outfile5 lrecl=1500; 
   PUT ref_year 
       y1_p10 y1_p20 y1_p25 y1_p50 y1_p75 y1_p80 y1_p90
       y2_p10 y2_p20 y2_p25 y2_p50 y2_p75 y2_p80 y2_p90
       y3_p10 y3_p20 y3_p25 y3_p50 y3_p75 y3_p80 y3_p90
       resid_p10 resid_p20 resid_p25 resid_p50 resid_p75 resid_p80 resid_p90
       yhat_p10 yhat_p20 yhat_p25 yhat_p50 yhat_p75 yhat_p80 yhat_p90
       y2y1_p10 y2y1_p20 y2y1_p25 y2y1_p50 y2y1_p75 y2y1_p80 y2y1_p90
       bbar_p10 bbar_p20 bbar_p25 bbar_p50 bbar_p75 bbar_p80 bbar_p90;
RUN; 

